package top.jolyoulu.core.rdd.persist

import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @Author: JolyouLu
 * @Date: 2024/2/7 20:51
 * @Description
 */
object Spark01_RDD_Persist {
  def main(args: Array[String]): Unit = {
    //准备环境 [*]:表示使用当前系统最大核
    val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD")
    val sc: SparkContext = new SparkContext(sparkConf)
    val rdd: RDD[String] = sc.makeRDD(List("Hello Spark","Hello World"))

    val flatRDD: RDD[String] = rdd.flatMap(_.split(" "))

    val mapRDD: RDD[(String, Int)] = flatRDD.map(word=>{
      println("执行RDD-Map中！！！")
      (word,1)
    })

    mapRDD.persist(StorageLevel.DISK_ONLY)

    val reduceRDD: RDD[(String, Int)] = mapRDD.reduceByKey(_ + _)
    reduceRDD.collect().foreach(println)
    println("===================================================")
    val groupRDD: RDD[(String, Iterable[Int])] = mapRDD.groupByKey()
    groupRDD.collect().foreach(println)
    //关闭环境
    sc.stop()
  }

}
